 
cd "$output"
program drop _all

*graphing preferences
graph set window fontface "Franklin Gothic Book"
graph set eps fontface "Franklin Gothic Book"
graph set ps fontface "Franklin Gothic Book"
set scheme plotplainblind, perm

*grstyle init 
*grstyle color background white 
*grstyle linewidth major_grid none

****************SWITCHES*********************
global programs_globals 1 //always leave on -- set up utilities for processing the data

global parent_voter_data			1
global setup_parent_analysisfile  	1
global t_covbal_parent				1
global t_parent_vote				1


if $programs_globals==1{
*DEFINE GLOBALS
	global covariates baselinefemale baselineblack baselinehisp baselineasian baselineotherrace ///
		baselinesped baselinelep baselinefrpl  yobdum* yeardum*
	global baselinetests baseline_e baseline_m
 
*Instruments
global Z1 initial_offer_v
global Z2 waitlist_offer_v
*risk sets
global risksets db_*
*cluster variable
global clustervar schoolXyear
***** Options for table formating *****
global opts	a f plain coll(none) nodep nomti c(b(star fmt(%9.3f)) se(abs par fmt(%9.3f))) star(+ .10 * .05 ** .01 *** .001) noobs
*******Standard table notes***************
global note_iv		"Each coefficient labeled 2SLS is the instrumental variables estimate of attending a Boston charter with a lottery and parent name information in the first two years after the lottery. " 
global note_z		"Indicator variables for a lottery offer on the day of the lottery (initial offer) and lottery offer off of the waitlist (waitlist offer) are the instruments for charter attendance. " 
global note_ccm 	"The control complier mean is labeled CCM. " 
global note_controls	"All regressions control for lottery risk sets and a vector of demographic characteristics. " 
global note_sample	"The sample is restricted to students enrolled Boston Public Schools or Boston charter schools who applied to charter schools in 2008 to 2016 in lotteries with parent name information.  "    
global note_stars	"Robust standard errors clustered by student are in parentheses (+ p$<$0.10 * p$<$0.05 ** p$<$0.01 ***p$<$0.001). " 
global D Dv

*****programs*****


program define parentreg

*endogenous variable varies based on outcome
*default is any attendance through 12th grade (including repeates ) for SAT, AP, HS grad 5 and 6, and most voter and college outcomes
global D Dv

cap drop oneminusD
g oneminusD = 1 - $D

global lab_${Y} : variable label ${Y}
qui g ${Y}_Y0=${Y}*oneminusD

qui ivreg2 ${Y}_Y0 (oneminusD = $Z1 $Z2) $covariates $risksets if  parent_sample==1 & have_parent_name==1 ///
		, partial($covariates  $risksets) robust 
				local ccm = _b[oneminusD]
qui eststo iv_${Y}: ivreg2 ${Y} ($D = $Z1 $Z2) $covariates $risksets if  parent_sample==1 & have_parent_name==1 ///
		, partial($covariates  $risksets) robust
		qui estadd scalar ccm=`ccm'
		
  
end
  

program define coef 
	local b = string( _b[${D}], "%9.3f")
	local se = string( _se[${D}], "%9.3f")
	qui esttab
	mat r= r(coefs)
	mat r2 = r["${D}",3]
		local p=r2[1,1]	
	if `p'<0.001{
			local s="***"
			}
	else if `p'<0.01{
			local s="**"
			}	
	else if `p'<0.05{
			local s="*"
			}	
	else if `p'<0.1{
			local s="+"
			}
	else if `p'>=0.1{
			local s=""
			}
		global b_${Y}="`b'`s'"
		global s_${Y}="(`se')"
	
end
program define coef_IO 
	local b = string( _b[${Z1}], "%9.3f")
	local se = string( _se[${Z1}], "%9.3f")
	qui esttab
	mat r= r(coefs)
	mat r2 = r["${Z1}",3]
		local p=r2[1,1]	
	if `p'<0.001{
			local s="***"
			}
	else if `p'<0.01{
			local s="**"
			}	
	else if `p'<0.05{
			local s="*"
			}	
	else if `p'<0.1{
			local s="+"
			}
	else if `p'>=0.1{
			local s=""
			}
		global b1_${Y}="`b'`s'"
		global s1_${Y}="(`se')"
end
program define coef_WO 
	local b = string( _b[${Z2}], "%9.3f")
	local se = string( _se[${Z2}], "%9.3f")
	qui esttab
	mat r= r(coefs)
	mat r2 = r["${Z2}",3]
		local p=r2[1,1]	
	if `p'<0.001{
			local s="***"
			}
	else if `p'<0.01{
			local s="**"
			}	
	else if `p'<0.05{
			local s="*"
			}	
	else if `p'<0.1{
			local s="+"
			}
	else if `p'>=0.1{
			local s=""
			}
		global b2_${Y}="`b'`s'"
		global s2_${Y}="(`se')"
end

program define coefccm
	local b = string( _b[oneminusD], "%9.3f")
	local se = string( _se[oneminusD], "%9.3f")
	qui esttab
	mat r= r(coefs)
	mat r2 = r["oneminusD",3]
		local p=r2[1,1]	
	if `p'<0.001{
			local s="***"
			}
	else if `p'<0.01{
			local s="**"
			}	
	else if `p'<0.05{
			local s="*"
			}	
	else if `p'<0.1{
			local s="+"
			}
	else if `p'>=0.1{
			local s=""
			}
		global bc_${Y}="`b'" // not putting stars for CCM
		global sc_${Y}="(`se')"
		
end

	
}

  if $parent_voter_data == 1{
  
*clean up common names data
import excel "E:\Charter\Raw Data\VOTER\Names_2010Census_Top1000.xlsx", sheet("2010_Top1000") firstrow clear
keep SURNAME RANK
ren SURNAME lname
g lname_rank = 1001-RANK
drop RANK

save "$data/common_lnames.dta", replace

import excel "E:\Charter\Raw Data\VOTER\Top200_SSA.xlsx", sheet("1960s") firstrow clear
ren Name fname_male
ren Rank rank
ren D fname_female
drop Num E

reshape long fname_ , i(rank) j(gender) string
ren fname_ fname
replace fname=upper(fname)

g fname_rank = 201-rank
drop rank

* a few common across genders
bys fname: egen max = max(fname_rank)
keep if max == fname_rank
drop max
save "$data/common_fnames.dta", replace


*voters	
use "$data/parent_voting_all.dta", clear
compress

g parent_reg_before_lotto = reg_date<lotto_date& reg_date!=.
g parent_reg_after_lotto = reg_date>=lotto_date & reg_date!=.


*name characteristics
g fname_length = length(fname) if fname!=""
g lname_length = length(lname) if lname!=""


merge m:1 lname using "$data/common_lnames.dta", keep(1 3) nogen

merge m:1 fname using "$data/common_fnames.dta", keep(1 3) nogen
replace fname_rank = fname_rank*5

gen name_rank=fname_rank +lname_rank
replace name_rank = 0 if fname!=""&lname!="" &name_rank==.

g uncommon_name = name_rank==0



save "$data/parent_voter.dta", replace //this includes some non-sample parents
  }
  
 if $setup_parent_analysisfile==1{


use "$data_setup/all_applicants_wide", clear
		drop if gradeapp<=3
		keep if boston_lottery ==1 
		duplicates drop
		count
	duplicates drop sasid, force 
				
		
/** SCHOOLS WITH PARENT INFO
apr 2006-2013 x
*bos col 2009 and 2010 x
*bos prep 2010, 2012, 2013 x 
*Coah I 2007, 2008, 2009, 2010, 2011, 2012 x
*Codman 2008, 2010, 2011, 2013 x 
*Match HS 2002 - 2006 x
*Match 2011, 2012, 2013, 2014 (MS)
*Rox prep 2006, 2007, 2009, 2011*/
*Potential to add Match ES

*************************************
*CAM LOOK HERE FOR LIST OF SCHOOLS CJA
*************************************

g keepflag = 0 
replace keepflag =1 if applyAPR==1 &(yearapp>=2011&yearapp<=2013) //x 2006-2010  suspect -- no parent info for initial offers or NO offers, 
replace keepflag =1 if applyBosCol==1 &(yearapp==2009) // 2010 no info on initial or no offers
replace keepflag =1 if applyBosPrep==1 &(yearapp==2010|yearapp==2012|yearapp==2013|(yearapp==2005|yearapp==2007))   // 2006 suspect
replace keepflag =1 if applyCoaH==1 &(yearapp>=2005&yearapp<=2012) // exclude 2002 and 2004 not enough parent info -- data issue;
replace keepflag =1 if applyCodman==1 &(yearapp==2004|yearapp==2008|yearapp==2010|yearapp==2011) //2013  suspect
replace keepflag =1 if applyMATCH_MS==1 &(yearapp>=2011&yearapp<=2014) //x
replace keepflag =1 if applyMATCH_HS==1 &(yearapp>=2002&yearapp<=2006) //x
replace keepflag =1 if applyRoxPrep==1 &((yearapp>=2002&yearapp<=2005)|(yearapp>=2008&yearapp<=2009)|yearapp==2011) //x 2006 and 2007  suspect
*replace keepflag =1 if applyMATCH_ES == 1 &(yearapp>2011|yearapp<2013)

keep if keepflag==1 //some people came along in parent file even if they didn't have parent info from Old Boston files 
		

		*merge sims baseline
		merge 1:1 sasid using "$data/baselinedemos_voter.dta", keep(1 3) nogen
		
		*merge sims school attendance
		merge 1:1 sasid using "$data/sims_voter_wide.dta", keep(1 3) nogen
				
		*merge mcas
		merge 1:1 sasid using "$data/mcas_wide.dta", keep(1 3) nogen
		
		*outcomes are processed more below
		
		
******************************************************************
**** Lottery setup ****
* Baseline variables
* Endogenous variables
* Risk sets
******************************************************************

	*baseline
	drop baselinegrade
	gen byte baselinegrade=gradeapp-1

	
	*baseline SCORES
	g baseline_m=.
	g baseline_e=.
	
	
	* NEW 3/23/2015 EMS - The most accurate way to define baseline is not the grade before application, but the application year
	* That way, if students apply to multiple grades (as in the PK and Kindergarteners) or if a student gets held back, their proper baseline scores are used
			*extremely minor differences, going with this method		

	foreach n in 4 5 6 7 8 {		
			replace baseline_m=c_state_mrawsc`n'  if yearapp==mcasyear`n'
			replace baseline_e=c_state_erawsc`n' if yearapp==mcasyear`n'
	}

	*fixes for mcas testing 
	replace baseline_e=c_state_erawsc7 if baselinegrade==8&yearapp<=2005
	replace baseline_e=c_state_erawsc4 if (baselinegrade>=5&baselinegrade<=7)&yearapp<=2005
	replace baseline_m=c_state_mrawsc4 if (baselinegrade>=5&baselinegrade<=7)&yearapp<=2005

	g hasbaseline_m=(baseline_m~=.)
	g hasbaseline_e=(baseline_e~=.)

	qui tab baselineyob, ge(yobdum)
	

	
	* post lotto year
	g  postlottoyear=yearapp+1
	g postlottomasscode=masscode5 if gradeapp==5
		replace postlottomasscode=masscode6 if gradeapp==6
		replace postlottomasscode=masscode9 if gradeapp==9
		replace postlottomasscode=masscode4 if gradeapp==4
		replace postlottomasscode=99999 if postlottomasscode==. //if no masscode, can stay in data for clustering

	qui tab postlottoyear, ge(yeardum) //revisited becausse prior version conditioned on present in 10th grade

*****************RISK SETS *******************************
	*Risk sets based on lotteried sample schools
	*DEFINE STUDY SAMPLE HERE -- right now all lotteried charters
			*APR 4120530
			*BosCol 4490305
			*Boston Green Academy 4110305
			*Boston Prep 4160305
			*CoaH  4370505 and CoaHII  35040505 
			*Codman 4380505
			*EdBrooke (not 2 and 3) 4280305
			*Excel (not 3) 4100205
			*Match HS 4690505
			*Match MS 4690505
			*RoxPrep (not Uncommon or Grove Hall or DP) 4840505
		
		
*BOSTON
	egen  lottogroupYR_boston=group(yearapp applyprioritygroup applyAPR applyBGA applyBosPrep applyBosCol ///
		applyMATCH_HS applyMATCH_MS applyCodman applyCoaH applyCoaHII applyExcel applyEdBrooke ///
		applyRoxPrep )
	qui tab lottogroupYR_boston, ge(db_)

	g boston_sample = 1
	
	*CREATE CLUSTER VARIABLE
	egen schoolXyear=group(postlottoyear postlottomasscode) //revisited becausse prior version conditioned on present in 10th grade
			egen schoolXyearLTO=group(year10 masscode10) 
			qui tab year10, gen(yearLTOdum)
			egen blah= group(year9 masscode9)
			replace schoolXyearLTO=blah if schoolXyearLTO==.
			drop blah

	foreach x of varlist *_repeats* {
		replace `x'=0 if `x'==.
	}
	
	* For those who apply in 9th, 5th, and 6th grades, separately add their total years in charter, then combine this variable
	* If people are in a charter middle school and the apply to a charter high school, we only want to consider their middle school 
		* lottery, but count all of their years in charter (even if they are in a charter in high school)
		* Since we've took the earliest grade application for each student, this is what we've done.
	foreach b in blottocharter {
		egen `b'years9=rowtotal(`b'9 `b'10 `b'11 `b'12 `b'_repeats9 `b'_repeats10 `b'_repeats11 `b'_repeats12) if gradeapp==9
		egen `b'years5=rowtotal(`b'5 `b'6 `b'7 `b'8 `b'9 `b'10 `b'11 `b'12  `b'_repeats5 `b'_repeats6 `b'_repeats7 `b'_repeats8 `b'_repeats9 `b'_repeats10 `b'_repeats11 `b'_repeats12) if gradeapp==5
		egen `b'years6=rowtotal(`b'6 `b'7 `b'8 `b'9 `b'10 `b'11 `b'12  `b'_repeats6 `b'_repeats7 `b'_repeats8 `b'_repeats9 `b'_repeats10 `b'_repeats11 `b'_repeats12) if gradeapp==6
		egen `b'years7=rowtotal( `b'7 `b'8 `b'9 `b'10 `b'11 `b'12   `b'_repeats7 `b'_repeats8 `b'_repeats9 `b'_repeats10 `b'_repeats11 `b'_repeats12) if gradeapp==7
		egen `b'years4=rowtotal(`b'4 `b'5 `b'6 `b'7 `b'8 `b'9 `b'10 `b'11 `b'12  `b'_repeats4  `b'_repeats5 `b'_repeats6 `b'_repeats7 `b'_repeats8 `b'_repeats9 `b'_repeats10 `b'_repeats11 `b'_repeats12) if gradeapp==4

		gen `b'years=`b'years9
			replace `b'years=`b'years4 if `b'years4!=.
			replace `b'years=`b'years5 if `b'years5!=.
			replace `b'years=`b'years6 if `b'years6!=.
			replace `b'years=`b'years7 if `b'years7!=.
			*drop `b'years9 `b'years5 `b'years6
*by 10th grade	
		egen `b'years9by10=rowtotal(`b'9 `b'10 `b'_repeats9 ) if gradeapp==9
		egen `b'years4by10=rowtotal(`b'4 `b'5 `b'6 `b'7 `b'8 `b'9 `b'10 `b'_repeats4 `b'_repeats5 `b'_repeats6 `b'_repeats7 `b'_repeats8 `b'_repeats9 ) if gradeapp==4
		egen `b'years5by10=rowtotal(`b'5 `b'6 `b'7 `b'8 `b'9 `b'10  `b'_repeats5 `b'_repeats6 `b'_repeats7 `b'_repeats8 `b'_repeats9 ) if gradeapp==5
		egen `b'years6by10=rowtotal(`b'6 `b'7 `b'8 `b'9 `b'10  `b'_repeats6 `b'_repeats7 `b'_repeats8 `b'_repeats9 ) if gradeapp==6
		egen `b'years7by10=rowtotal( `b'7 `b'8 `b'9 `b'10  `b'_repeats7 `b'_repeats8 `b'_repeats9 ) if gradeapp==7

		gen `b'yearsby10=`b'years9by10
			replace `b'yearsby10=`b'years4by10 if `b'years4by10!=.
			replace `b'yearsby10=`b'years5by10 if `b'years5by10!=.
			replace `b'yearsby10=`b'years6by10 if `b'years6by10!=.
			replace `b'yearsby10=`b'years7by10 if `b'years7by10!=.
			*drop `b'years9 `b'years5 `b'years6
			
*by 11th grade	
		egen `b'years9by11=rowtotal(`b'9 `b'10 `b'_repeats9 `b'11 `b'_repeats10  ) if gradeapp==9
		egen `b'years4by11=rowtotal(`b'4 `b'5 `b'6 `b'7 `b'8 `b'9 `b'10 `b'_repeats4 `b'_repeats5 `b'_repeats6 `b'_repeats7 `b'_repeats8 `b'_repeats9 `b'11 `b'_repeats10  ) if gradeapp==4
		egen `b'years5by11=rowtotal(`b'5 `b'6 `b'7 `b'8 `b'9 `b'10  `b'_repeats5 `b'_repeats6 `b'_repeats7 `b'_repeats8 `b'_repeats9 `b'11 `b'_repeats10 ) if gradeapp==5
		egen `b'years6by11=rowtotal(`b'6 `b'7 `b'8 `b'9 `b'10  `b'_repeats6 `b'_repeats7 `b'_repeats8 `b'_repeats9 `b'11 `b'_repeats10 ) if gradeapp==6
		egen `b'years7by11=rowtotal( `b'7 `b'8 `b'9 `b'10  `b'_repeats7 `b'_repeats8 `b'_repeats9 `b'11 `b'_repeats10  ) if gradeapp==7

		gen `b'yearsby11=`b'years9by11
			replace `b'yearsby11=`b'years4by11 if `b'years4by11!=.
			replace `b'yearsby11=`b'years5by11 if `b'years5by11!=.
			replace `b'yearsby11=`b'years6by11 if `b'years6by11!=.
			replace `b'yearsby11=`b'years7by11 if `b'years7by11!=.
			*drop `b'years9 `b'years5 `b'years6
			
*by 12th grade	
		egen `b'years9by12=rowtotal(`b'9 `b'10 `b'_repeats9 `b'11 `b'_repeats10 `b'12 `b'_repeats11   ) if gradeapp==9
		egen `b'years4by12=rowtotal(`b'4 `b'5 `b'6 `b'7 `b'8 `b'9 `b'10 `b'_repeats4 `b'_repeats5 `b'_repeats6 `b'_repeats7 `b'_repeats8 `b'_repeats9 `b'11 `b'_repeats10 `b'12 `b'_repeats11   ) if gradeapp==4
		egen `b'years5by12=rowtotal(`b'5 `b'6 `b'7 `b'8 `b'9 `b'10  `b'_repeats5 `b'_repeats6 `b'_repeats7 `b'_repeats8 `b'_repeats9 `b'11 `b'_repeats10 `b'12 `b'_repeats11  ) if gradeapp==5
		egen `b'years6by12=rowtotal(`b'6 `b'7 `b'8 `b'9 `b'10  `b'_repeats6 `b'_repeats7 `b'_repeats8 `b'_repeats9 `b'11 `b'_repeats10 `b'12 `b'_repeats11   ) if gradeapp==6
		egen `b'years7by12=rowtotal( `b'7 `b'8 `b'9 `b'10  `b'_repeats7 `b'_repeats8 `b'_repeats9 `b'11 `b'_repeats10 `b'12 `b'_repeats11   ) if gradeapp==7

		gen `b'yearsby12=`b'years9by12
			replace `b'yearsby12=`b'years4by12 if `b'years4by12!=.
			replace `b'yearsby12=`b'years5by12 if `b'years5by12!=.
			replace `b'yearsby12=`b'years6by12 if `b'years6by12!=.
			replace `b'yearsby12=`b'years7by12 if `b'years7by12!=.
			*drop `b'years9 `b'years5 `b'years6
			
*two years after lottery	
		egen `b'years9by2=rowtotal(`b'9 `b'10 `b'_repeats9 ) if gradeapp==9
		egen `b'years4by2=rowtotal(`b'5 `b'4   `b'_repeats4   ) if gradeapp==4
		egen `b'years5by2=rowtotal(`b'5 `b'6   `b'_repeats5   ) if gradeapp==5
		egen `b'years6by2=rowtotal(`b'6 `b'7 `b'_repeats6 ) if gradeapp==6
		egen `b'years7by2=rowtotal( `b'7 `b'8  `b'_repeats7  ) if gradeapp==7

		gen `b'yearsby2=`b'years9by2
			replace `b'yearsby2=`b'years4by2 if `b'years4by2!=.
			replace `b'yearsby2=`b'years5by2 if `b'years5by2!=.
			replace `b'yearsby2=`b'years6by2 if `b'years6by2!=.
			replace `b'yearsby2=`b'years7by2 if `b'years7by2!=.
			*drop `b'years9 `b'years5 `b'years6
			
		egen `b'yearsLTO=rowtotal(`b'9 `b'10 `b'_repeats9 )

			
			
	g byte ever_`b'=`b'years>=1 &`b'years!=.
	g byte ever_`b'by10=`b'yearsby10>=1&`b'yearsby10!=.
	g byte ever_`b'by11=`b'yearsby11>=1&`b'yearsby11!=.
	g byte ever_`b'by12=`b'yearsby12>=1&`b'yearsby12!=.
	g byte  ever_`b'by2=`b'yearsby2>=1&`b'yearsby2!=.
	g byte ever_`b'LTO = `b'yearsLTO>=1&`b'yearsLTO!=.

			
	}
			
			
	g twoyearsout_e=.
	g twoyearsout_m=.
	g twoyearsout_grade =. 
	
	g two_prof_e=.
	g two_prof_m=.
	g two_adv_e=.
	g two_adv_m=.
	
	foreach n in 4 5 6 7 9 {		
			local n2=`n'+1
			replace twoyearsout_e = c_state_erawsc`n2' if gradeapp==`n'
			replace twoyearsout_m = c_state_mrawsc`n2'  if gradeapp==`n'
			replace twoyearsout_grade = `n2'  if gradeapp==`n'
			replace two_prof_e = 0 if escaleds`n2'!=. & gradeapp==`n'
			replace two_prof_e = 1 if escaleds`n2'!=. & escaleds`n2'>=240 & gradeapp==`n'
			replace two_prof_m = 0 if mscaleds`n2'!=. & gradeapp==`n'
			replace two_prof_m = 1 if mscaleds`n2'!=. &  mscaleds`n2'>=240 & gradeapp==`n'
			replace two_adv_e = 0 if escaleds`n2'!=. & gradeapp==`n'
			replace two_adv_e = 1 if escaleds`n2'!=. & escaleds`n2'>=260 & gradeapp==`n'
			replace two_adv_m = 0 if mscaleds`n2'!=. & gradeapp==`n'
			replace two_adv_m = 1 if mscaleds`n2'!=. &mscaleds`n2'>=260 & gradeapp==`n'
	}
	
	

merge 1:m sasid using "$data/parent_voter.dta", keep(1 3) nogen // keeps only charters that have parent info (~6k in this file that don't merge == who are they?)

	  g have_parent_name =  fname!=""&lname!=""
	  
g linked_to_1 = voters_linked==1

g linked_to_mult = voters_linked>1&voters_linked!=.

g linked_to_none = voters_linked==.|voters_linked==0
 

*Voting outcomes only exist for those who have the potential to match to data
foreach v of varlist vote* ev* linked_*  registered* {
	replace `v' =0 if `v'==. &have_parent_name!=0
	replace `v'= 0 if have_parent_name==0 // or missing?
}

	 g parent_match = (voters_linked!=0)
	 
	 bys sasid: gen count = _n
	 g first_sasid = count ==1 
	 drop count

	 bys usasid: gen count = _n
	 g first_usasid = count ==1 
	 drop count
	 
	 tab yearapp parent_match if first_sasid==1, row
	 
	 keep if yearapp<=2016&yearapp>=2005 // for now cutoff at 2005
	 
	 unique sasid 
	 
	 unique usasid
	 
	  g parent_sample = 1 if ( baselinemasscode~=.&hasbaselinedemos==1& inbostonbaseline==1 &boston_sample==1 ) &yearapp<=2016&yearapp>=2005
	  
	  keep if parent_sample == 1
	  
	   unique sasid 
	 
	 unique usasid
	  
	  
	*recode offers
	*Switch to school code variables Offers and Apply
			foreach vartype in offer apply initial_offer  {

				gen `vartype'_4840505 = (`vartype'RoxPrep==1)
				gen `vartype'_4120530 = (`vartype'APR==1)
				gen `vartype'_4490305 = (`vartype'BosCol==1)
				gen `vartype'_4160305 = (`vartype'BosPrep==1)
				gen `vartype'_4370505 = (`vartype'CoaH==1)
				gen `vartype'_4380505 = (`vartype'Codman==1)
				gen `vartype'_4690505 = (`vartype'MATCH_HS==1 | `vartype'MATCH_MS==1)
			}		
			

g twoyearsoutgrade = baselinegrade +3

global masscodes_insample 4120530  4490305   4160305 4370505  4380505  4840505 4690505 
	
	* Setup for charter years by school:
	foreach m of global masscodes_insample {
			gen ever_`m' = 0
			foreach n of numlist 5/10 {
				gen schl_`m'_`n'=(masscode`n'==`m') if masscode`n'!=.
				replace ever_`m'=1 if schl_`m'_`n'==1 & `n'>baselinegrade  & `n'<= twoyearsoutgrade & apply_`m' ==1 
				*count before test w/in two years -- should keep MS and HS separate
				*only counting years when have applied to that school
			}	
	}
			
			foreach vartype in offer apply initial_offer ever   {
				gen `vartype'_4690506=`vartype'_4690505 if `vartype'_4690505==1 & (baselinegrade==4 | baselinegrade==5 )
					replace `vartype'_4690506 = 0 if `vartype'_4690506==.
				gen `vartype'_4690509=`vartype'_4690505 if `vartype'_4690505==1 & baselinegrade==8
					replace `vartype'_4690509 = 0 if `vartype'_4690509==.
				drop `vartype'_4690505
			}
global masscodes_insample2 4120530  4490305   4160305 4370505  4380505  4840505 4690509 4690506

	*drop initial_offer waitlist_offer 
	g Dv = 0
	g initial_offer_v = 0
	g offer_v =0
	foreach m of global masscodes_insample2 {
		replace Dv=1 if ever_`m'==1
		replace initial_offer_v=1 if initial_offer_`m'==1
		replace offer_v=1 if offer_`m'==1
	}
	
	g waitlist_offer_v= offer_v
	replace waitlist_offer_v = 0 if initial_offer_v==1
	
	g Dn = ever_blottocharter-Dv
	g waitlist_offer_n = waitlist_offer 
	replace waitlist_offer_n= 0 if waitlist_offer_v ==1
	g initial_offer_n = initial_offer 
	replace initial_offer_n= 0 if initial_offer_v ==1
	  
	  
	 g pres_before = vote_gen_2004 if yearapp<=2008
	 replace pres_before = vote_gen_2008 if yearapp<=2012&pres_before==.
	 replace pres_before = vote_gen_2012 if yearapp<=2016&pres_before==.
	 


label var baselinefemale "Female"
label var baselineasian "Asian"
label var baselineblack "Black"
label var baselinehisp "Latinx"
label var baselineotherrace "Other race" 
label var baselinewhite "White"
label var baselinesped "Special education"
label var baselinelep "English learner"
label var	baselinefrpl  "Free/reduced price lunch"
label var   baseline_e "Baseline MCAS ELA"
label var baseline_m "Baseline MCAS Math"
label var fname_l "Length of first name"
label var lname_l "Length of last name"
label var name_rank "Commonality of name"
label var linked_to_1 "Linked to one voting record"
label var linked_to_mult "Linked to multiple voting records"
label var linked_to_none "Linked to no voting records"
label var have_parent_name "Parent name present in lottery records"
label var uncommon_name "Not common name"

drop first*id
	 bys sasid: gen count = _n
	 g first_sasid = count ==1 
	 drop count

	 bys usasid: gen count = _n
	 g first_usasid = count ==1 
	 drop count
	  
save "$data/parent_analysisfile.dta", replace
 }
 
 if $t_covbal_parent==1{
estimates clear
 use "$data/parent_analysisfile.dta", clear
keep if parent_sample==1 
global Z1 initial_offer_v
global Z2 waitlist_offer_v
global D Dv

global stucovs baselinefemale baselineasian baselineblack baselinehisp baselineotherrace baselinewhite ///
	baselinesped baselinelep baselinefrpl $baselinetests

global stucovs2 baselinefemale  baselineblack baselinehisp baselineotherrace baselinewhite ///
	baselinesped baselinelep baselinefrpl $baselinetests
	
// STUDENT CHARS //

	file open  t	using t_covbal_parent1.tex, replace write
	file write t	"\begin{table}[htbp!] \centering" _n "\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi}" _n ///
					"\caption{ Covariate Balance, Lotteries with Parent Information, Student Characteristics }" _n "\label{t_covbal_parent1}" _n "\small" _n ///
					"\begin{tabular*}{1\textwidth}{@{\extracolsep{\fill}}l*{3}{c}}" _n "\midrule" _n ///
					"&Non-offered &Initial Offer &Waitlist Offer\\" _n ///
					"&Mean& Differential&Differential\\" _n ///
					"&(1)&(2)&(3)\\" _n ///
					"\midrule" _n 
file close  t		

preserve
keep if first_sasid==1  
foreach y of global stucovs  {
global Y `y'
global lab_${Y} : variable label ${Y}
	qui su `y' if $Z1==0&$Z2==0
		global m_`y' = string( r(mean), "%9.3f")
	qui areg `y' $Z1  , absorb(lottogroupYR_boston ) r 
		coef_IO
	qui areg `y' $Z2 , absorb(lottogroupYR_boston)  r  
		coef_WO
file open  t	using t_covbal_parent1.tex, append write	
file write t  	"\hspace{.5cm} ${lab_`y'} &${m_`y'}& ${b1_`y'}& ${b2_`y'} \\" _n ///	
		"	& &${s1_`y'}& ${s2_`y'} \\" _n 	
file close  t			
}
	qui mvreg $stucovs2  = $Z1 db_*  
	test $Z1
global p1 =string(r(p), "%9.3f")
	qui mvreg $stucovs2   = $Z2 db_*  
	test $Z2			  
global p2 =string(r(p), "%9.3f")

count  if first_sasid==1 
local n1 = string( r(N), "%9.0fc")

restore
	file open  t 	using t_covbal_parent1.tex, append write
	file write t 	" &\textit{p}-value& ${p1}& ${p2} \\"    ///
 			"\midrule" _n "\end{tabular*}" _n ///
			"\begin{tabular*}{1\textwidth}{p{6.3in}}" _n ///
					"\footnotesize Notes:  " ///
					"This table shows means and offer differentials for student and parent characteristics in the parent lottery sample. The sample is restricted to students enrolled Boston Public Schools or Boston charter schools who applied to charter schools in 2008 to 2016 who applied to lotteries with parent name information.  Student characteristics are from the SIMS data and the data is limited to one observation per student (n = `n1'). " ///
					 "Column 1 shows the proportion of non-offered students with a given characteristic. " ///
					"Columns 2 and 3 report coefficients from regressions of the student characteristic on initial and waitlist offer dummies, including controls for risk sets (+ p$<$0.10 * p$<$0.05 ** p$<$0.01 ***p$<$0.001). " ///
					"\end{tabular*}" _n "\end{table}" _n 
	file close t
	

// PARENT CHARS //
	file open  t	using t_covbal_parent2.tex, replace write
	file write t	"\begin{table}[htbp!] \centering" _n "\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi}" _n ///
					"\caption{Covariate Balance, Lotteries with Parent Information, Parent Characteristics  }" _n "\label{t_covbal_parent2}" _n "\small" _n ///
					"\begin{tabular*}{1\textwidth}{@{\extracolsep{\fill}}l*{3}{c}}" _n "\midrule" _n ///
					"&Non-offered &Initial Offer &Waitlist Offer\\" _n ///
					"&Mean& Differential&Differential\\" _n ///
					"&(1)&(2)&(3)\\" _n ///
					"\midrule" _n ///
					 "(A)  Has parent name  &&\\" _n "\cmidrule{1-1}" _n
file close  t	
	*divide up
foreach y of varlist have_parent_name   { //have_parent_name
global Y `y'
global lab_${Y} : variable label ${Y}
	qui su `y' if $Z1==0&$Z2==0
		global m_`y' = string( r(mean), "%9.3f")
	qui areg `y' $Z1 , absorb(lottogroupYR_boston ) r cluster(sasid)
		coef_IO
	qui areg `y' $Z2 , absorb(lottogroupYR_boston)  r  cluster(sasid)
		coef_WO
file open  t	using t_covbal_parent2.tex, append write	
file write t  	"\hspace{.5cm} ${lab_`y'} &${m_`y'}& ${b1_`y'}& ${b2_`y'} \\" _n ///	
		"	& &${s1_`y'}& ${s2_`y'} \\" _n 	
file close  t			
}

	file open  t 	using t_covbal_parent2.tex, append write
	file write t 	"  \\"  _n "\\" _n ///
					" \cmidrule{1-1}" _n "(B) Parent name characteristics   &&\\" _n "\cmidrule{1-1}" _n
file close  t	
foreach y of varlist  fname_length lname_length name_rank uncommon_name  { 
global Y `y'
global lab_${Y} : variable label ${Y}
	qui su `y' if $Z1==0&$Z2==0
		global m_`y' = string( r(mean), "%9.3f")
	qui areg `y' $Z1  if have_parent_name!=0 , absorb(lottogroupYR_boston ) r cluster(sasid)
		coef_IO
	qui areg `y' $Z2   if have_parent_name!=0  , absorb(lottogroupYR_boston)  r  cluster(sasid)
		coef_WO
file open  t	using t_covbal_parent2.tex, append write	
file write t  	"\hspace{.5cm} ${lab_`y'} &${m_`y'}& ${b1_`y'}& ${b2_`y'} \\" _n ///	
		"	& &${s1_`y'}& ${s2_`y'} \\" _n 	
file close  t			
}
	qui mvreg fname_length lname_length name_rank uncommon_name  = $Z1 db_* if have_parent_name!=0 
	test $Z1
global p1 =string(r(p), "%9.3f")
	qui mvreg fname_length lname_length name_rank uncommon_name  = $Z2 db_* if have_parent_name!=0
	test $Z2	

global p2 =string(r(p), "%9.3f")

count if have_parent_name!=0
local n2 = string( r(N), "%9.0fc")
count 
local n3 = string( r(N), "%9.0fc")

	file open  t 	using t_covbal_parent2.tex, append write
	file write t 	" &\textit{p}-value& ${p1}& ${p2} \\" _n ///
					" \cmidrule{1-1}" _n "(C) Linked to voting data   &&\\" _n "\cmidrule{1-1}" _n
file close  t	
foreach y of varlist  linked_to_1 linked_to_mult linked_to_none   { 
global Y `y'
global lab_${Y} : variable label ${Y}
	qui su `y' if $Z1==0&$Z2==0
		global m_`y' = string( r(mean), "%9.3f")
	qui areg `y' $Z1 , absorb(lottogroupYR_boston ) r cluster(sasid)
		coef_IO
	qui areg `y' $Z2 , absorb(lottogroupYR_boston)  r  cluster(sasid)
		coef_WO
file open  t	using t_covbal_parent2.tex, append write	
file write t  	"\hspace{.5cm} ${lab_`y'} &${m_`y'}& ${b1_`y'}& ${b2_`y'} \\" _n ///	
		"	& &${s1_`y'}& ${s2_`y'} \\" _n 	
file close  t			
}
			file open  t 	using t_covbal_parent2.tex, append write
	file write t 			"\midrule" _n "\end{tabular*}" _n ///
					"\begin{tabular*}{1\textwidth}{p{6.3in}}" _n ///
					"\footnotesize Notes:  " ///
					"This table shows means and offer differentials for student and parent characteristics. The sample is restricted to students enrolled Boston Public Schools or Boston charter schools who applied to charter schools in 2008 to 2016 who applied to lotteries with parent name information.   Parent name characteristics (Panel B) are derived from parent names and thus are conditional on existence of a parent name. There are multiple observations per student if a student has two parent names associated with their information (Panel A: N = `n3', Panels B and C: N = `n2'); in this case, standard errors are clusted by student. " ///
					"Column 1 shows the proportion of non-offered students with a given characteristic. " ///
					"Columns 2 and 3 report coefficients from regressions of the student characteristic on initial and waitlist offer dummies, including controls for risk sets (+ p$<$0.10 * p$<$0.05 ** p$<$0.01 ***p$<$0.001). " ///
					"\end{tabular*}" _n "\end{table}" _n 
	file close t

}
 
 if $t_parent_vote==1{
 
 use "$data/parent_analysisfile.dta", clear
 
 keep if parent_sample==1 //&yearapp>=2011
 
 *probit // obs ber parent

 probit linked_to_1  fname_length lname_length name_rank uncommon_name ///
		baselinefemale baselineasian baselineblack baselinehisp   baselineotherrace ///
		baselinesped baselinelep baselinefrpl yobdum* i.yearapp i.gradeapp  ///
		if first_usasid==1&have_parent_name==1, cluster(sasid) 
		
predict phat

su linked_to_1 if e(sample)==1
local m = r(mean)

g weight = ((1-phat)/phat)*((`m') / (1-`m'))

keep if weight !=.

*weighted/unweighted
*all
*only perfect matches, others coded as zero
 //this includes more recent files NOT in voter analysis file
 
 global parentvote ever_registered  after first_pos_pres ever_pres  vote_gen_2016  pres_before  

ren parent_reg_after_lotto after

*recode outcomes for only perfect matches 
foreach v of global parentvote {
	g `v'_mod = `v'
	replace `v'_mod = . if voters_linked>1&voters_linked!=.
}



estimates clear


global D Dv
cap drop oneminusD
g oneminusD = 1 - $D

cap drop *Y0

foreach y of global parentvote {
global Y `y'
	
global lab_${Y} : variable label ${Y}
qui g ${Y}_Y0=${Y}*oneminusD

qui ivreg2 ${Y}_Y0 (oneminusD = $Z1 $Z2) $covariates $risksets if  parent_sample==1 &have_parent_name==1  ///
		, partial($covariates  $risksets) robust cluster(sasid)	
				local ccm = _b[oneminusD]
qui eststo iv_${Y}: ivreg2 ${Y} ($D = $Z1 $Z2) $covariates $risksets if  parent_sample==1 & have_parent_name==1 ///
		, partial($covariates  $risksets) robust cluster(sasid)	
		qui estadd scalar ccm=`ccm' 
}

count if e(sample) == 1
local n1 = string(r(N),  "%9.0fc")

	
file open  t	using t_parent_vote.tex, replace write
	file write t	"\begin{sidewaystable}[htbp!] \centering" _n "\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi}" _n ///
					"\caption{The Impact of Charter School Attendance on Parent Voting  }" _n "\label{t_parent_vote}" _n "\small" _n ///
					"\begin{tabular*}{1\textwidth}{@{\extracolsep{\fill}}l*{7}{c}}" _n "\midrule" _n ///
					"&\multicolumn{2}{c}{Voter Registration}&\multicolumn{3}{c}{Voting After Lottery}&\multicolumn{1}{c}{Placebo Test} \\" _n ///
					"\cline{2-3} \cline{4-6} \cline{7-8} \\" _n ///
					"&Ever&Registered 		&First Possible &Any   &Pres.&Presidential\\" _n ///
					"&Registered& After Lottery	&Presidential& Pres.&2016&Before Lottery\\" _n ///
					"&(1)&(2)&(3)&(4)&(5)&(6)\\" _n ///
					"\midrule"  _n ///
					"(A) All Matches, Unweighted (\textit{N} = `n1') \\" _n "\midrule" _n  
file close t 


	esttab iv_* 	using t_parent_vote.tex, k($D) coef($D "2SLS ") 	s(ccm , l( "\hspace{.25cm} CCM"  ) f(3 ) lay( @  )) $opts   

	
estimates clear
cap drop *Y0

foreach y of global parentvote  {
global Y `y'
	
global lab_${Y} : variable label ${Y}
qui g ${Y}_Y0=${Y}*oneminusD

qui ivreg2 ${Y}_Y0 (oneminusD = $Z1 $Z2) $covariates $risksets [aw = weight] if  parent_sample==1 &have_parent_name==1 ///
		, partial($covariates  $risksets) robust cluster(sasid)		
				local ccm = _b[oneminusD]
qui eststo iv_${Y}: ivreg2 ${Y} ($D = $Z1 $Z2) $covariates $risksets [aw = weight] if  parent_sample==1 & have_parent_name==1  ///
		, partial($covariates  $risksets) robust cluster(sasid)		
		qui estadd scalar ccm=`ccm' 
}
count if e(sample) == 1
local n2 = string(r(N),  "%9.0fc")

file open  t	using t_parent_vote.tex, append write	
file write t	" \cmidrule{1-1}" _n "(B) All Matches, IPW   (\textit{N} = `n2') \\" _n " \cmidrule{1-1}" _n  
file close t


	esttab iv_* 	using t_parent_vote.tex, k($D) coef($D "2SLS") 	s(ccm , l( " \hspace{.25cm}  CCM"  ) f(3 ) lay( @  )) $opts     
preserve
estimates clear
cap drop *Y0

foreach y of global parentvote {
global Y `y'_mod

global lab_${Y} : variable label ${Y}
qui g ${Y}_Y0=${Y}*oneminusD

qui ivreg2 ${Y}_Y0 (oneminusD = $Z1 $Z2) $covariates $risksets if  parent_sample==1 & have_parent_name==1  &first_usasid==1 ///
		, partial($covariates  $risksets) robust cluster(sasid)	
				local ccm = _b[oneminusD]
qui eststo iv_${Y}: ivreg2 ${Y} ($D = $Z1 $Z2) $covariates $risksets if  parent_sample==1 & have_parent_name==1  &first_usasid==1 ///
		, partial($covariates  $risksets) robust cluster(sasid)	
		qui estadd scalar ccm=`ccm' 
}
count if e(sample) == 1
local n3 =string(r(N),  "%9.0fc")

file open  t	using t_parent_vote.tex, append write	
file write t	" \cmidrule{1-1}" _n "(C) Matched to 1, Unweighted (\textit{N} = `n3')  \\" _n " \cmidrule{1-1}" _n  
file close t

	esttab iv_* 	using t_parent_vote.tex, k($D) coef($D "2SLS ") 		s(ccm , l( "\hspace{.25cm}  CCM"  ) f(3 ) lay( @  )) $opts    

estimates clear
cap drop *Y0

foreach y of global parentvote  {
global Y `y'_mod

global lab_${Y} : variable label ${Y}
qui g ${Y}_Y0=${Y}*oneminusD

qui ivreg2 ${Y}_Y0 (oneminusD = $Z1 $Z2) $covariates $risksets [aw = weight] if  parent_sample==1&  have_parent_name==1 &first_usasid==1  ///
		, partial($covariates  $risksets) robust cluster(sasid)	
				local ccm = _b[oneminusD]
qui eststo iv_${Y}: ivreg2 ${Y} ($D = $Z1 $Z2) $covariates $risksets [aw = weight]  if  parent_sample==1 & have_parent_name==1   &first_usasid==1 ///
		, partial($covariates  $risksets) robust cluster(sasid)	
		qui estadd scalar ccm=`ccm' 
}

count if e(sample) == 1
local n4 = string(r(N),  "%9.0fc")

file open  t	using t_parent_vote.tex, append write	
file write t	" \cmidrule{1-1}" _n "(D) Matched to 1, IPW (\textit{N} = `n4')  \\" _n " \cmidrule{1-1}" _n  
file close t

	esttab iv_* 	using t_parent_vote.tex, k($D) coef($D "2SLS ") 	s(ccm , l( "\hspace{.25cm}  CCM"  ) f(3 ) lay( @  )) $opts   
	
	count if first_sasid ==1 & have_parent_name==1 
local n5 = string(r(N),  "%9.0fc")

count if first_usasid ==1  &have_parent_name==1 
local n6 = string(r(N),  "%9.0fc")


 use "$data/parent_analysisfile.dta", clear
	 ren parent_reg_after_lotto after
 keep if parent_sample==1 // &yearapp>=2011

 keep if have_parent_name==1

*this is at parent level
 probit linked_to_1  fname_length lname_length name_rank uncommon_name ///
		baselinefemale baselineasian baselineblack baselinehisp   baselineotherrace ///
		baselinesped baselinelep baselinefrpl yobdum* i.yearapp i.gradeapp  ///
		if first_usasid==1&have_parent_name==1, cluster(sasid) 
		
predict phat

su linked_to_1 if e(sample)==1
local m = r(mean)

g weight = ((1-phat)/phat)*((`m') / (1-`m'))

keep if weight !=.

*parent	
collapse (mean) $parentvote $covariates	$risksets $D  $Z1 $Z2  weight , by( sasid usasid)

*student
collapse (mean) $parentvote $covariates	$risksets $D  $Z1 $Z2  weight , by( sasid)


g oneminusD = 1 - $D


cap drop *Y0
estimates clear

foreach y of global parentvote {
global Y `y'

global lab_${Y} : variable label ${Y}
qui g ${Y}_Y0=${Y}*oneminusD

qui ivreg2 ${Y}_Y0 (oneminusD = $Z1 $Z2) $covariates $risksets  ///
		, partial($covariates  $risksets) robust cluster(sasid)	
				local ccm = _b[oneminusD]
qui eststo iv_${Y}: ivreg2 ${Y} ($D = $Z1 $Z2) $covariates $risksets ///
		, partial($covariates  $risksets) robust cluster(sasid)	
		qui estadd scalar ccm=`ccm' 
}
count if e(sample) == 1
local n3 =string(r(N),  "%9.0fc")

file open  t	using t_parent_vote.tex, append write	
file write t	" \cmidrule{1-1}" _n "(E) Collapsed to Student, Unweighted (\textit{N} = `n3')  \\" _n " \cmidrule{1-1}" _n  
file close t

	esttab iv_* 	using t_parent_vote.tex, k($D) coef($D "2SLS ") 		s(ccm , l( "\hspace{.25cm}  CCM"  ) f(3 ) lay( @  )) $opts   

estimates clear
cap drop *Y0

foreach y of global parentvote  {
global Y `y'

global lab_${Y} : variable label ${Y}
qui g ${Y}_Y0=${Y}*oneminusD

qui ivreg2 ${Y}_Y0 (oneminusD = $Z1 $Z2) $covariates $risksets [aw = weight]  ///
		, partial($covariates  $risksets) robust cluster(sasid)	
				local ccm = _b[oneminusD]
qui eststo iv_${Y}: ivreg2 ${Y} ($D = $Z1 $Z2) $covariates $risksets [aw = weight]   ///
		, partial($covariates  $risksets) robust cluster(sasid)	
		qui estadd scalar ccm=`ccm' 
}

count if e(sample) == 1
local n4 = string(r(N),  "%9.0fc")

file open  t	using t_parent_vote.tex, append write	
file write t	" \cmidrule{1-1}" _n "(F) Collapsed to Student Level, IPW (\textit{N} = `n4')  \\" _n " \cmidrule{1-1}" _n  
file close t

	esttab iv_* 	using t_parent_vote.tex, k($D) coef($D "2SLS ") 	s(ccm , l( "\hspace{.25cm}  CCM"  ) f(3 ) lay( @  )) $opts   
	
	


	file open  t 	using t_parent_vote.tex, append write
	file write t 		"\midrule" _n "\end{tabular*}" _n ///
					"\begin{tabular*}{1\textwidth}{p{8.4in}}" _n ///
					"\footnotesize Notes:  " ///
					"$note_iv" "$note_z" "$note_ccm" "$note_controls" "$note_sample"  ///
					"Panels A and B include all matches to the voting data, including multiple matches for parents with common names. Panels C and D include only the parents matched to a single name in the voter file, or matched to no names, excluding those matched to multiple matches. Panels E and F are averaged at the student level, which takes the mean voting outcome both for students associated with multiple parents, and those associated with multiple matches. Panels B, D, and F inverse propesity weight based on likelihood of having a match in the voter data. All regressions with multiple parents associated with a student cluster standard errors at the student level. " ///
					"$note_stars" ///
					"\end{tabular*}" _n "\end{sidewaystable}" _n 
	file close t	

	
	
		 
 }